library(tidyverse)
library(sf)
library(here)
library(patchwork)
library(Polychrome)
library(ggrepel)
library(Metrics)
library(gt)
library(geojsonsf)


# Save this script and the R project in the directory where you downloaded the data from
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/7T7QDH


# Boulder ------------------------------------------------------------------

## Segments ####
# Analysis of roof segments for Boulder validation.

### Metadata ####
# Load the metadata files
metadata <- list.files(here(), pattern = "metadata", full.names = TRUE) %>% 
  map_df(read_delim)

# Create a table of the NEON solar zeniths
neon_zenith <- metadata %>% 
  filter(imager == "NEON") %>% 
  select(Flightline = NEON_flight_ID, solarZenith, solarAzimuth) %>% 
  distinct() 

# Tidy the data
metadata <- metadata %>% 
  mutate(solarAzimuth = coalesce(solarAzimuth, solarAzimuthAngle, S2_MEAN_SOLAR_AZIMUTH_ANGLE),
         solarElevation = case_when(imager == "NEON" ~ 90 - solarZenith,
                                    imager == "S2" ~ 90 - S2_MEAN_SOLAR_ZENITH_ANGLE),
         solarElevation = coalesce(solarElevation, solarElevationAngle)) %>% 
  select(-matches("S2_MEAN"),
         -matches("Angle"),
         -matches("Zenith")) %>% 
  rename(albedoDate = PNeo_cal_middate,
         PNeoDate = PNeo_date_string) %>% 
  mutate(albedoDate = as.character(albedoDate),
         albedoDate = case_when(imager == "NEON" ~ NEON_flight_ID,
                                .default = albedoDate),
         imager = case_when(imager == "PNEO" ~ "PNeo", .default = imager)) %>% 
  select(- S2_mid_date_for_median,
         - NEON_flight_ID,
         - `system:index`,
         - `.geo`) %>% 
  distinct()


### Segments ####
# Load the roof segments
read_geo <- function(path) {
  delim <- if (str_detect(path, "\\.csv$")) "," else "\t"
  read_delim(
    file = path,
    delim = delim,
    show_col_types = FALSE,
    escape_backslash = TRUE,  
    escape_double   = TRUE,
    quote = "\""
  )
}

segments_full <- list.files(here(), pattern = "roofsegments", full.names = TRUE) %>% 
  keep(~ !str_detect(basename(.x), "metadata")) %>% 
  map_df(read_geo) %>% 
  mutate(ID = row_number(),
         geometry = geojson_sf(.geo)$geometry) %>%
  st_as_sf() %>% 
  st_transform(32613) %>% 
  mutate(segment_area = as.numeric(st_area(.))) %>% 
  st_drop_geometry()

# Pivot the observations to long format
segments_long <- segments_full %>% 
  pivot_longer(
    cols = matches("^(NEON|PNeo|S2)_.*_albedo_.*$"),
    names_to = c("imager", "Date", ".value"),
    names_pattern = "^(NEON|PNeo|S2)_(.+)_albedo_(.+)$"
  ) %>% 
  filter(if_any(c(mean, median, stdDev), ~ !is.na(.))) %>% 
  separate(Date, into = c("PNeoDate", "albedoDate"), sep = "_cal_", fill = "right") %>% 
  separate(albedoDate, into = c("albedoDate", "type"), sep = "_", extra = "merge") %>% 
  mutate(albedoDate = case_when(imager == "NEON" ~ PNeoDate, .default = albedoDate),
         PNeoDate = case_when(imager == "NEON" ~ NA, .default = PNeoDate)) %>% 
  left_join(metadata, by = c("PNeoDate", "albedoDate", "imager")) %>% 
  mutate(albedoDate = case_when(imager == "NEON" ~ "2024-05-08",
                                .default = albedoDate)) 

### Calculate surface albedo ####

# Load functions
source(here("surface-albedo.R"))

segments_long <- segments_long %>% 
  rowwise() %>% 
  mutate(normal = list(unit_normal(segment_pitch, segment_azimuth)),
         alpha_s = compute_segment_surface_albedos(solarZenithDeg = (90 - solarElevation), 
                                                   solarAzimuthDeg = solarAzimuth, 
                                                   albedoMedian = median, 
                                                   normal = normal, 
                                                   pitchDegrees = segment_pitch)) %>% 
  select(-normal)

# Summarize the NEON surface albedo
neon <- segments_long %>% 
  filter(imager == "NEON") %>% 
  group_by(ID) %>% 
  summarise(NEON_stdDev_alpha_s = sd(alpha_s, na.rm = TRUE),
            NEON_stdDev_med = sd(median, na.rm = TRUE),
            NEON_surface_albedo = median(alpha_s, na.rm = TRUE),
            NEON_image_albedo = median(median, na.rm = TRUE))

# Exclude 2023, make wide, and join to NEON observations
segments_wide <- segments_long %>% 
  filter(imager != "NEON") %>% 
  left_join(neon, by = "ID") %>% 
  ungroup()

# Dataset comparison
all_data_gof <- segments_wide %>%
  filter(NEON_stdDev_alpha_s < 0.01) %>%
  mutate(PNeoDate = str_extract(PNeoDate, "^[^_]+")) %>% 
  group_by(imager, type, albedoDate, PNeoDate) %>%
  nest() %>%
  mutate(
    n = map_int(data, nrow),
    surface_rmse = map_dbl(data, ~ round(rmse(.x$NEON_surface_albedo, .x$alpha_s), 3)),
    surface_bias = map_dbl(data, ~ round(bias(.x$NEON_surface_albedo, .x$alpha_s), 3) * -1),
    surface_rsquare = map_dbl(data, ~ round(summary(lm(NEON_surface_albedo ~ alpha_s, data = .x))$r.squared, 3)),
    image_rmse = map_dbl(data, ~ round(rmse(.x$NEON_image_albedo, .x$median), 3)),
    image_bias = map_dbl(data, ~ round(bias(.x$NEON_image_albedo, .x$median), 3) * -1),
    image_rsquare = map_dbl(data, ~ round(summary(lm(NEON_image_albedo ~ median, data = .x))$r.squared, 3)),
    label_s = paste0("RMSE = ", surface_rmse, "\n", 
                     "R-squared = ", surface_rsquare, "\n",
                     "bias = ", surface_bias),
    label_i = paste0("RMSE = ", image_rmse,
                     ", R-squared = ", image_rsquare,
                     ", bias = ", image_bias)
  ) %>%
  select(-data) %>%
  ungroup() 


# Table of goodness-of-fit values ####

# Step 1: Get it in long format (surface/image per row)
albedo_summary_long <- all_data_gof %>%
  select(albedoDate, PNeoDate, n, type,
         surface_rsquare, surface_rmse, surface_bias,
         image_rsquare, image_rmse, image_bias) %>%
  pivot_longer(
    cols = matches("^(surface|image)_(rmse|rsquare|bias)$"),
    names_to = c("source", "metric"),
    names_sep = "_",
    values_to = "value"
  ) %>%
  mutate(source = ifelse(source == "surface", "Surface", "Image")) %>%
  pivot_wider(
    names_from = type,
    values_from = value
  ) %>% 
  select(!reprojected) %>% 
  rename("CC" = "unreprojected",
         "CT" = "unreprojected_CT",
         "S2" = "registered") %>% 
  mutate(type = case_when(albedoDate == "2024-04-30" ~ "Single date", .default = "100-day median"))


# Step 2: Pivot wider to get RMSE, R², Bias as top-level headers
albedo_summary_wide <- albedo_summary_long %>%
  pivot_wider(
    names_from = metric,
    values_from = c(CC, CT, S2),
    names_glue = "{metric}_{.value}"
  ) %>%
  select(albedoDate, PNeoDate, n, source, everything()) %>%
  arrange(desc(type), albedoDate, PNeoDate, source) %>% 
  relocate(type)


# Step 3: Format with gt
## Table 1: Single-date image ####
albedo_summary_wide %>%
  filter(albedoDate == "2024-04-30") %>% 
  select(! c(type, albedoDate)) %>% 
  mutate(PNeoDate = case_when(PNeoDate == "2023-05-23" ~ "May 5, 2023", 
                              PNeoDate == "2024-05-08" ~ "May 8, 2024",
                              PNeoDate == "2024-05-16" ~ "May 16, 2024",
                              .default = PNeoDate),
         n = as.factor(n)) %>% 
  gt() %>%
  cols_label(
    PNeoDate = "High-res Date",
    n = "N",
    source = "Albedo Type",
    
    # RMSE
    rmse_CT = "Transfer",
    rmse_CC = "Convolution",
    rmse_S2 = "Sentinel-2",
    
    # R-squared
    rsquare_CT = "Transfer",
    rsquare_CC = "Convolution",
    rsquare_S2 = "Sentinel-2",
    
    # Bias
    bias_CT = "Transfer",
    bias_CC = "Convolution",
    bias_S2 = "Sentinel-2"
  ) %>%
  tab_spanner(label = "RMSE", columns = starts_with("rmse_")) %>%
  tab_spanner(label = "R²", columns = starts_with("rsquare_")) %>%
  tab_spanner(label = "Bias", columns = starts_with("bias_")) %>%
  fmt_number(columns = where(is.numeric), decimals = 3) %>%
  tab_options(table.font.size = px(13)) %>% 
  as_latex()

## Table 2: temporal medians ####
albedo_summary_wide %>%
  filter(albedoDate != "2024-04-30") %>% 
  select(! c(type, albedoDate, ends_with("CT"))) %>% 
  left_join(albedo_summary_wide %>%
              filter(albedoDate == "2024-04-30") %>% 
              select(PNeoDate, source, ends_with("CC")) %>% 
              rename_with(~ gsub("CC$", "SD", .), ends_with("CC"))) %>% 
  relocate(ends_with("SD"), .before = ends_with("CC")) %>% 
  rename_with(~ gsub("S2$", "S2_TM", .), ends_with("S2")) %>% 
  left_join(albedo_summary_wide %>%
              filter(albedoDate == "2024-04-30") %>% 
              select(PNeoDate, source, ends_with("S2")) %>% 
              rename_with(~ gsub("S2$", "S2_SD", .), ends_with("S2"))) %>% 
  relocate(ends_with("TM"), .after = ends_with("S2_SD")) %>% 
  mutate(PNeoDate = case_when(PNeoDate == "2023-05-23" ~ "May 5, 2023", 
                              PNeoDate == "2024-05-08" ~ "May 8, 2024",
                              PNeoDate == "2024-05-16" ~ "May 16, 2024",
                              .default = PNeoDate)) %>% 
  gt() %>%
  cols_label(
    PNeoDate = "High-res Date",
    source = "Albedo Type",
    
    # RMSE
    rmse_SD = "Single date",
    rmse_CC = "Temporal median",
    rmse_S2_SD = "Single date",
    rmse_S2_TM = "Temporal median",
    
    # R-squared
    rsquare_SD = "Single date",
    rsquare_CC = "Temporal median",
    rsquare_S2_SD = "Single date",
    rsquare_S2_TM = "Temporal median",
    
    # Bias
    bias_SD = "Single date",
    bias_CC = "Temporal median",
    bias_S2_SD = "Single date",
    bias_S2_TM = "Temporal median"
  ) %>%
  tab_spanner(label = "RMSE", columns = starts_with("rmse_")) %>%
  tab_spanner(label = "R²", columns = starts_with("rsquare_")) %>%
  tab_spanner(label = "Bias", columns = starts_with("bias_")) %>%
  tab_spanner(label = "Sentinel-2", columns = contains("S2")) %>%
  tab_spanner(label = "High-res", columns = !contains(c("S2", "PNeoDate", "source"))) %>%
  fmt_number(columns = where(is.numeric), decimals = 3) %>%
  tab_options(table.font.size = px(13)) %>% 
  as_latex()


# Only reprojected PNEO 

segments <- segments_long %>% 
  filter(imager != "NEON") %>% 
  filter(type == "unreprojected" | type == "registered") %>% 
  left_join(neon, by = "ID") %>% 
  ungroup()

### Goodness-of-fit ####

segments_all <- segments %>% 
  filter(NEON_stdDev_alpha_s < 0.01,
         albedoDate != "2024-04-30") %>%
  group_by(imager) %>% 
  nest() %>% 
  mutate(n = map(data, ~ nrow(.)),
         surface_rmse =  map(data, ~ round(rmse(.$NEON_surface_albedo, .$alpha_s), 3)),
         surface_bias = map(data, ~ round(bias(.$NEON_surface_albedo, .$alpha_s), 3) * -1),
         surface_rsquare = map(data, ~ round(summary(lm(.$NEON_surface_albedo ~ .$alpha_s))$r.squared, 3)),
         image_rmse =  map(data, ~ round(rmse(.$NEON_image_albedo, .$median), 3)),
         image_bias = map(data, ~ round(bias(.$NEON_image_albedo, .$median), 3) * -1),
         image_rsquare = map(data, ~ round(summary(lm(.$NEON_image_albedo ~ .$median))$r.squared, 3))) %>% 
  unnest(data) %>% 
  mutate(label_s = paste0("RMSE = ", surface_rmse, "\n", 
                          "R-squared = ", surface_rsquare, "\n",
                          "bias = ", surface_bias),
         label_i = paste0("RMSE = ", image_rmse,
                          ", R-squared = ", image_rsquare,
                          ", bias = ", image_bias)) %>% 
  ungroup()

## Overture Buildings ####

# Function to read files
read_files <- function(file){
  
  dat <- read_geo(file)
  
  # Extract metadata from file names
  calibration_mid <- file %>% 
    str_sub(1, 10)
  
  PNeo_date <- file %>% 
    str_sub(28, 46)
  
  dat <- dat %>% 
    rename(ID = 'system:index') %>% 
    mutate(calibration_mid = as_date(calibration_mid),
           PNeo_date = PNeo_date, 
           ID = as.character(ID))
  
}

### Load Overture buildings ####
overture <- list.files(here(), pattern = "overture_samples_flight_lines") %>% 
  {.[!str_detect(., "params")]} %>% 
  map_df(read_files) %>% 
  mutate(geometry = geojsonsf::geojson_sfc(.geo)) %>%
  sf::st_as_sf() %>% 
  select(!15:19) %>% 
  select(!16:28) %>% 
  drop_na(2:14) %>% 
  st_transform(32613) %>% 
  mutate(sample = "Overture building",
         area_m2 = as.numeric(st_area(.))) 

# Load the overture buildings with the minimum zenith angle from the NEON flightlines
overture_buildings <- st_read(here("overture-boulder-min-zenith.geojson")) 

# Join min zenith to buildings by id and summarize the albedo observations
# for each building footprint
overture_zenith <- overture_buildings %>% 
  st_drop_geometry() %>% 
  group_by(id) %>% 
  summarise(NEON_alb_sd = sd(med_alb, na.rm = TRUE),
            NEON_zenith_range = max(solarZenith, na.rm = TRUE) - min(solarZenith, na.rm = TRUE),
            NEON_alb_min_zenith = med_alb[which.min(solarZenith)],
            n = n()) 

mean(overture_zenith$NEON_alb_sd, na.rm = T)

overture <- overture %>%
  select(!contains("NEON")) %>%
  left_join(overture_zenith, by = "id") %>%
  drop_na(n) %>%
  select(!contains("mean") & !contains("stdDev") & !ID) %>%
  pivot_longer(cols = 1:2, names_to = "Var", values_to = "Albedo") %>%
  separate_wider_delim(Var, "_", names = c("Sensor", "x", "Var")) %>%
  select(-c(x, n, Var)) %>%
  group_by(Sensor, id) %>%
  summarise(
    Albedo = median(Albedo, na.rm = TRUE),
    NEON_median = first(NEON_alb_min_zenith),
    NEON_zenith_range = first(NEON_zenith_range),
    NEON_alb_sd = first(NEON_alb_sd),
    area = first(area_m2),
    geometry = first(geometry),  # ✅ manually preserve geometry
    .groups = "drop"
  ) %>%
  drop_na(NEON_median) %>% 
  st_as_sf()  # ✅ reattach sf class if needed


## Plots ####


# Figure 4 ----------------------------------------------------------------


### Segments ####
seg_plot <- segments_all %>% 
  mutate(imager = case_when(imager == "PNeo" ~ "High-res", .default = imager)) %>% 
  ggplot(aes(x = NEON_surface_albedo, y = alpha_s)) +
  geom_hex(binwidth = 0.01) +
  scale_fill_viridis_c(option = "magma", direction = -1) +   # Use a continuous color scale
  scale_alpha(range = c(0.1, 0.8)) + 
  geom_abline(intercept = 0, slope = 1, color = "black", linetype="dotted") +
  geom_smooth(method = "lm", se = FALSE, color = "black", lwd = 0.3) +
  xlim(0, .9) +
  ylim(0, 1) +
  xlab("NEON surface albedo") +
  ylab("Surface albedo") +
  coord_equal() +
  theme_bw(base_size = 12) +
  theme(plot.background = element_rect(fill = NA, color = NA)) +
  facet_wrap(~ imager) +
  geom_text(
    mapping = aes(x = 0, y = 1, label = label_s),
    hjust   = "left",
    nudge_y = -0.15,
    size.unit = "pt",
    size = 10
  ) +
  guides(fill = "none") +
  labs(subtitle = "Roof segments")

### Overture medians ####

build_plot <- overture %>% 
  group_by(Sensor) %>% 
  nest() %>% 
  mutate(n = map(data, ~ nrow(.)),
         image_rmse =  map(data, ~ round(rmse(.$NEON_median, .$Albedo), 3)),
         image_bias = map(data, ~ round(bias(.$NEON_median, .$Albedo), 3) * -1),
         image_rsquare = map(data, ~ round(summary(lm(.$NEON_median ~ .$Albedo))$r.squared, 3))) %>% 
  unnest(data) %>% 
  mutate(label_i = paste0("RMSE = ", image_rmse, "\n", 
                          "R-squared = ", image_rsquare, "\n",
                          "bias = ", image_bias),
         Sensor = case_when(Sensor == "PNeo" ~ "High-res", .default = Sensor)) %>% 
  ggplot(aes(x = NEON_median, y = Albedo)) +
  geom_hex(binwidth = 0.01) +
  scale_fill_viridis_c(option = "magma", direction = -1) +   # Use a continuous color scale
  scale_alpha(range = c(0.1, 0.8)) +
  geom_abline(intercept = 0, slope = 1, color = "black", linetype="dotted") +
  geom_smooth(method = "lm", se = FALSE, color = "black", lwd = 0.3) +
  xlim(0, .9) +
  ylim(0, 1) +
  xlab("NEON image albedo") +
  ylab("Image albedo") +
  coord_equal() +
  theme_bw(base_size = 12) +
  theme(plot.background = element_rect(fill = NA, color = NA)) +
  facet_wrap(~ Sensor) +
  geom_text(
    mapping = aes(x = 0, y = 1, label = label_i),
    hjust   = "left",
    nudge_y = -0.15,
    size.unit = "pt",
    size = 10
  )  +
  guides(fill = "none") +
  labs(subtitle = "Building footprints")

### Density ####

density_df <- segments_all %>% 
  select(Albedo = alpha_s, Sensor = imager) %>% 
  bind_rows(segments_all %>% 
              select(Albedo = NEON_surface_albedo, ids) %>% 
              distinct() %>% 
              mutate(Sensor = "NEON")) %>% 
  mutate(dataset = "Roof segments") %>% 
  bind_rows(overture %>% 
              bind_rows(overture %>% 
                          select(Albedo = NEON_median, id) %>% 
                          distinct() %>% 
                          mutate(Sensor = "NEON")) %>% 
              select(Sensor, Albedo) %>% 
              mutate(dataset = "Building footprints")) 

dens_plot <- ggplot(density_df) +
  geom_density(aes(x = Albedo, color = Sensor)) +
  scale_color_manual(name = "", values = c("PNeo" = "red", "S2" = "blue", "NEON" = "lightblue")) +
  facet_wrap(~ factor(dataset, c("Roof segments", "Building footprints")), ncol = 1) +
  theme_bw(base_size = 12) +
  xlim(0, 0.4) +
  ylab("Density") +
  theme(
    legend.position = "inside",
    legend.position.inside = c(.8, .9), # c(0,0) bottom left, c(1,1) top-right.
    legend.background = element_rect(fill = "transparent", colour = NA)
  )
  
layout <- "
ABB
ACC
"

dens_plot + seg_plot + build_plot +
  plot_layout(design = layout) + plot_annotation(tag_levels = 'a')


# Fig. 3 ------------------------------------------------------------------

segments_unreprojected <- segments %>% 
  select(Albedo = median, NEON = NEON_image_albedo, segment_pitch, 
         segment_azimuth, roof_area_m2, imager, albedoDate, segment_area) %>% 
  drop_na() %>% 
  filter(albedoDate != "2024-04-30")

# pitch
pitch_model_unreprojected <- segments_unreprojected %>% 
  mutate(
    range = cut(
      segment_pitch, 
      breaks = quantile(segment_pitch, probs = seq(0, 1, 0.1), na.rm = TRUE), 
      include.lowest = TRUE
    ),
    bin = as.integer(range) + 20
  ) %>% 
  group_by(bin, imager, range) %>% 
  nest() %>% 
  mutate(
    RMSE = map(data, ~ round(rmse(.$NEON, .$Albedo), 3)),
    Bias = map(data, ~ round(bias(.$NEON, .$Albedo), 3) * -1),
    Rsquare = map(data, ~ round(summary(lm(NEON ~ Albedo, data = .x))$r.squared, 3))
  ) %>% 
  select(-data) %>%
  unnest(cols = c(RMSE, Bias, Rsquare)) %>% 
  mutate(model = "Pitch") %>% 
  ungroup() %>% 
  pivot_longer(cols = c(RMSE, Bias, Rsquare), names_to = "Metric", values_to = "Value")

## azimuth 
azimuth_model_unreprojected <- segments_unreprojected %>% 
  mutate(
    range = cut(
      segment_azimuth, 
      breaks = quantile(segment_azimuth, probs = seq(0, 1, 0.1), na.rm = TRUE), 
      include.lowest = TRUE
    ),
    bin = as.integer(range) + 10
  ) %>% 
  group_by(bin, imager, range) %>% 
  nest() %>% 
  mutate(
    RMSE = map(data, ~ round(rmse(.$NEON, .$Albedo), 3)),
    Bias = map(data, ~ round(bias(.$NEON, .$Albedo), 3) * -1),
    Rsquare = map(data, ~ round(summary(lm(NEON ~ Albedo, data = .x))$r.squared, 3))
  ) %>% 
  select(-data) %>%
  unnest(cols = c(RMSE, Bias, Rsquare)) %>% 
  mutate(model = "Azimuth") %>% 
  ungroup() %>% 
  pivot_longer(cols = c(RMSE, Bias, Rsquare), names_to = "Metric", values_to = "Value")


## area
area_model_unreprojected <- segments_unreprojected %>% 
  mutate(
    # range = cut(
    #   roof_area_m2, 
    #   breaks = quantile(roof_area_m2, probs = seq(0, 1, 0.1), na.rm = TRUE), 
    #   include.lowest = TRUE
    range = cut(
      segment_area, 
      breaks = quantile(segment_area, probs = seq(0, 1, 0.1), na.rm = TRUE), 
      include.lowest = TRUE
    ),
    bin = as.integer(range) 
  ) %>% 
  group_by(imager, range, bin) %>% 
  nest() %>% 
  mutate(
    RMSE = map(data, ~ round(rmse(.$NEON, .$Albedo), 3)),
    Bias = map(data, ~ round(bias(.$NEON, .$Albedo), 3) * -1),
    Rsquare = map(data, ~ round(summary(lm(NEON ~ Albedo, data = .x))$r.squared, 3))
  ) %>% 
  select(-data) %>%
  unnest(cols = c(RMSE, Bias, Rsquare)) %>% 
  mutate(model = "Area") %>% 
  ungroup() %>% 
  pivot_longer(cols = c(RMSE, Bias, Rsquare), names_to = "Metric", values_to = "Value")

all_roof_models <- area_model_unreprojected %>% 
  bind_rows(azimuth_model_unreprojected) %>% 
  bind_rows(pitch_model_unreprojected) 

bin_labels <- all_roof_models %>%
  group_by(model, bin) %>%
  summarise(upper_label = first(range), .groups = "drop") 

ggplot(all_roof_models %>% mutate(imager = case_when(imager == "PNeo" ~ "High-res", .default = imager))) +
  geom_point(aes(x = bin, y = Value, color = imager), size = 3) +
  geom_line(aes(x = bin, y = Value, color = imager, group = imager)) +  # Ensure grouping
  geom_hline(data = all_roof_models %>% filter(Metric %in% c("Bias")), 
             aes(yintercept = 0),  
             linetype = "dashed", color = "black") +
  
  # Set custom colors for PNeo and S2
  scale_color_manual(name = "", values = c("High-res" = "red", "S2" = "blue")) +
  
  # Ensure x-axis labels use custom labels
  scale_x_continuous(
    breaks = bin_labels$bin, 
    labels = bin_labels$upper_label,
    guide = guide_axis(angle = 45)
  ) +
  
  facet_grid(cols = vars(model), rows = vars(Metric), scales = "free") +
  ggtitle("Goodness of fit across roof variables") +
  xlab("Decile grouping") +
  
  theme(strip.text.y.left = element_text(angle = 0), 
        strip.placement = "outside") +
  theme_bw(base_size = 14) +
  theme(plot.background = element_rect(fill = NA, color = NA)) +
  ylab("")



# Boulder scenarios -------------------------------------------------
# Load the AOI
city_area <- st_read(here("buildings-city-limits-intersect.geojson")) %>% 
  st_transform(32613) %>% 
  mutate(area = as.numeric(st_area(.)))

# Select the NEON observations
neon_full <- overture %>% 
  select(Albedo = NEON_median, area, id) %>% 
  distinct() %>%
  mutate(Sensor = "NEON") %>% 
  drop_na()

# Join the NEON observations to the S2 and high-res observations to make the 
# data wide and calculate the effective area (cool roof albedo = 0.55)
overture_full <- overture %>% 
  filter(id %in% neon_full$id) %>% 
  bind_rows(neon_full) %>% 
  mutate(alb_delta = pmax(0.55 - Albedo, 0),
         Eff_area = alb_delta * area)

# Filter to the AOI
boulder_overture <- overture_full %>% 
  st_filter(city_area, .predicate = st_within) %>% 
  st_drop_geometry()

# Stats by sensor for all buildings (scenario I)
boulder_all_builds <-  boulder_overture %>% 
  group_by(Sensor) %>% 
  summarise(n = sum(Eff_area > 0),
            Eff_area = sum(Eff_area),
            City_area = city_area$area,
            Sensor = first(Sensor),
            area = sum(area),
            pct_area = area / city_area$area) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3)) 

# Specify the area threshold for scenario II
boulder_area_threshold <- quantile(boulder_overture$area, 0.9)

# Stats by sensor for large buildings (scenario II)
boulder_large_builds <- boulder_overture %>% 
  filter(area > boulder_area_threshold) %>% 
  group_by(Sensor) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = city_area$area,
            Sensor = first(Sensor),
            area = sum(area),
            n = n(),
            pct_area = area / city_area$area) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3)) 

# Specify the albedo threshold for scenario III
boulder_delta_threshold <- boulder_overture %>% 
  ungroup() %>% 
  filter(Sensor == "PNeo") %>% 
  summarize(t = quantile(alb_delta, 0.9)) %>% 
  pull()

# Stats by sensor for large buildings (scenario III)
boulder_delta_builds <- boulder_overture %>% 
  filter(alb_delta > boulder_delta_threshold) %>% 
  group_by(Sensor) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = city_area$area,
            Sensor = first(Sensor),
            area = sum(area),
            n = n(),
            pct_area = area / city_area$area) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3)) 

# All summaries
boulder_scenarios <- (boulder_all_builds %>% 
                mutate(Scenario = "All buildings")) %>% 
  bind_rows((boulder_large_builds %>% 
               mutate(Scenario = "Large buildings"))) %>% 
  bind_rows((boulder_delta_builds %>% 
               mutate(Scenario = "Low albedo buildings"))) %>% 
  mutate(Scenario = fct_relevel(Scenario, "All buildings", "Large buildings", "Low albedo buildings"),
         average = Alb_change * -0.4 / .10,
         pct_area = round(pct_area, 3))

boulder_scenarios %>% 
  ggplot() +
  geom_col(aes(x = Scenario, y = Alb_change, fill = Sensor, group = Sensor), 
           position = position_dodge(),
           color = "black") +
  scale_fill_manual(values = c("lightblue", "red", "blue")) +
  geom_label(aes(x = Scenario, y = Alb_change, label = paste0(average, "°C"), group = Sensor),
             position = position_dodge(width = 0.9), vjust = -0.2, size = 4) +
  xlab("") +
  ylab("City-wide albedo change") +
  labs(fill = NULL) +
  theme_bw(base_size = 18) 

# Scenario summary table
boulder_scenarios %>% 
  select(Sensor, n, , pct_area, Eff_area, Alb_change, Scenario, average) %>% 
  relocate(Scenario) %>% 
  gt() %>% 
  cols_label(
    Sensor = "Sensor",
    n = "N",
    pct_area = md("Percent of<br>city area"),
    Eff_area = md("Effective<br>area (m²)"),
    Alb_change = md("City-wide<br>albedo Δ"),
    Scenario = "Scenario",
    average = md("Temperature<br>Δ (°C)")
  ) %>%
  tab_options(table.font.size = px(13)) %>% 
  as_latex()


# Figure 6 ----------------------------------------------------------------
# Water masked mean albedo per city caluclated from Sentinel-2
city_albedo <- list.files(here(), pattern = "City_Medians_water_masked") %>% 
  read_delim() %>% 
  select(Albedo = albedo, City_area = area, city) %>% 
  mutate(city = str_to_title(city),
         city = str_replace(city, "-", " "))

# Function to calculate effective area for each city and sensor

eff_area_calc <- function(file){
  
  dat <- read_geo(file) %>% 
    st_drop_geometry() %>% 
    summarise(city = first(city),
              PNeo_eff_area = sum(PNeo_eff_area, na.rm = TRUE),
              S2_eff_area = sum(S2_eff_area, na.rm = TRUE),
              build_area = sum(area))
}

# Create a df of cities and effective area
eff_area_cities <- list.files(here(), pattern = "overture", full.names = TRUE) %>% 
  keep(~ !str_detect(basename(.x), "PNeo|zenith"))  %>% 
  map_df(eff_area_calc) %>% 
  mutate(city = str_to_title(str_replace(city, "-", " "))) 

# Join city-wide albedo and calculate the build area as a percent of the total area
eff_area_cities <- eff_area_cities %>% 
  left_join(city_albedo, by = "city") %>% 
  rename(City = city,
         "City-wide median albedo" = Albedo) %>% 
  mutate(build_pct = build_area / City_area)
 
# Pivot longer
eff_area_long <- eff_area_cities %>% 
  pivot_longer(cols = c(PNeo_eff_area, S2_eff_area), 
               names_to = "Sensor", values_to = "Effective area",
               names_pattern = "(.*)_eff_area") %>% 
  mutate(`Albedo increase` = `Effective area` /  City_area) %>% 
  filter(City != "Almeria") 


# Figure 6 ----------------------------------------------------------------


random_colors <- kelly.colors(14)[3:14]
names(random_colors) <- unique(eff_area_long$City)

# Cooling potential from Krayenhoff et al. 2021
# 0.2-0.6°C * effective area
eff_area_long <- eff_area_long %>% 
  mutate(lower = `Albedo increase` * -0.2 / .10,
         upper = `Albedo increase` * -0.6 / .10,
         average = `Albedo increase` * -0.4 / .10,
         `City area (km²)` = City_area / 1e6)

temp_change_plot <- eff_area_long %>% 
  mutate(end_albedo = `City-wide median albedo` + `Albedo increase`) %>% 
  filter(Sensor == "PNeo") %>% 
  ggplot(aes(x = `City-wide median albedo`, y = average, color = City, group = City)) +
  geom_point(size = 4) +
  geom_segment(aes(x = `City-wide median albedo`, xend = end_albedo, y = average, color = City, group = City),
               arrow = arrow(type = "closed", length = unit(0.1, "inches"))) +
  geom_label_repel(data = eff_area_long %>% 
                     group_by(City) %>%
                     slice(2),
                   aes(label = City), 
                   segment.color = NA,
                   nudge_x = 0.001,   
                   nudge_y = 0,      
                   hjust = 0,    
                   vjust = 0.5,  
                   label.padding = 0.1,  
                   label.r = 0.1,        
                   label.size = 0.5,    
                   direction = "y",
                   force_pull = 2) +
  guides(color = "none") +
  ylab("Change in air temperature (°C)") +
  scale_color_manual(values = random_colors) +
  theme_bw(base_size = 12) 

eff_area_long %>% 
  filter(Sensor == "PNeo") %>% 
  select(City, build_pct, `City-wide median albedo`, `Albedo increase`, average) %>% 
  gt() %>% 
  cols_label(
    City = "Sensor",
    build_pct = "Percent of city area",
    `City-wide median albedo` = "City-wide median albedo",
    `Albedo increase` = "Albedo increase",
    average = "Temperature Δ (°C)"
  ) %>%
  tab_options(table.font.size = px(13)) %>% 
  as_latex()




## percentile-comparisons-global-cities ####

# Load all buildings
builds <- list.files(here(), pattern = "overture", full.names = F) %>% 
  discard(~ str_detect(.x, "almeria")) %>% 
  keep(~ !str_detect(basename(.x), "PNeo|zenith"))  %>% 
  map_df(read_delim) %>% 
  select(! geometry) %>% 
  mutate(city = str_to_title(str_replace(city, "-", " "))) %>% 
  left_join(city_albedo, by = "city") %>% 
  mutate(area_pct = ntile(area, 10)) 

# Select high-res observations
pneo <- builds %>% 
  select(contains('PNeo'), area, City_area, city, area_pct) %>% 
  rename(Albedo = PNeo,
         Albedo_delta = PNeo_delta,
         Eff_area = PNeo_eff_area) %>% 
  mutate(Sensor = "PNeo")

# select S2 observations
s2 <- builds %>% 
  select(contains('S2'), area, City_area, city, area_pct) %>% 
  rename(Albedo = S2,
         Albedo_delta = S2_delta,
         Eff_area = S2_eff_area) %>% 
  mutate(Sensor = "S2")

# bind together to make long
builds_long <- pneo %>% 
  bind_rows(s2) %>% 
  mutate(city = str_to_title(city),
         city = str_replace(city, "-", " "))

# summarize
all_builds_summary <- builds_long %>% 
  group_by(city, Sensor) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = first(City_area),
            Sensor = first(Sensor),
            build_area = sum(area)) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3),
         build_pct = round(build_area / City_area, 3)) 

# Global high potential buildings
# High impact buildings 10%
high_potential_global <- builds_long %>%
  group_by(city, Sensor) %>%
  filter(Albedo_delta > quantile(builds_long$Albedo_delta, 0.9)) %>%  # Threshold from data < 50% overlap between S2 & PNeo
  ungroup() 

high_potential_global_summary <- high_potential_global %>% 
  group_by(city, Sensor) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = first(City_area),
            Sensor = first(Sensor)) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3)) 

# Histograms of high impact buildings with global threshold
high_potential_global %>% 
  ggplot() +
  geom_histogram(aes(x = Albedo_delta, fill = Sensor), position = "identity") +
  scale_fill_manual(values = c("red", "blue")) +
  facet_wrap(~ city, scales = "free") +
  ggtitle("Top 10% albedo potential")

# City-wise high potential
# High potential buildings 10% city-wise
high_potential_cities <- builds_long %>%
  group_by(city) %>%
  mutate(p90 = quantile(Albedo_delta, 0.9, na.rm = TRUE),
         p90 = round(p90, 2),
         label = paste0(p90, "\nΔα")) %>%  # Compute 90th percentile
  filter(Albedo_delta > p90) %>%  
  ungroup() 

# SI 
high_potential_cities %>% 
  ggplot() +
  geom_histogram(aes(x = Albedo_delta, fill = Sensor), position = "identity") +
  scale_fill_manual(values = c("red", "blue")) +
  facet_wrap(~ city, scales = "free") +
  ggtitle("Top 10% albedo potential by city")

high_potential_cities_summary <- high_potential_cities %>% 
  group_by(city, Sensor) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = first(City_area),
            Sensor = first(Sensor),
            label = first(label)) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3)) 



# By building size
area_summaries <- builds_long %>% 
  group_by(city, Sensor, area_pct) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = first(City_area),
            Alb_change = Eff_area / City_area) %>% 
  mutate(Alb_change = Eff_area / City_area) 

# SI Figure 1
area_summaries %>% 
  ggplot() +
  geom_col(aes(x = factor(area_pct), y = Alb_change, fill = Sensor), position = position_dodge()) +
  scale_fill_manual(values = c("red", "blue")) +
  facet_wrap(~ city, scales = "free") +
  xlab("Decile of area") +
  ylab("City-wide albedo change") +
  theme_bw(base_size = 14)
  

# Impacts of cool roofs on large buildings
large_buildings <- builds_long %>%
  group_by(city) %>% 
  mutate(p90 = quantile(area, 0.9, na.rm = TRUE),
         p90 = round(p90, 0),
         label = paste0(p90, "\nm²")) %>%  # Compute 90th percentile
  filter(area > p90) %>%  
  ungroup() 

large_buildings_summary <- large_buildings %>% 
  group_by(city, Sensor) %>% 
  summarise(Eff_area = sum(Eff_area),
            City_area = first(City_area),
            Sensor = first(Sensor),
            label = first(label)) %>% 
  mutate(Alb_change = round(Eff_area / City_area, 3)) 


# All summaries
scenarios <- (all_builds_summary %>% 
                mutate(Scenario = "All buildings")) %>% 
  bind_rows((large_buildings_summary %>% 
               mutate(Scenario = "Large buildings"))) %>% 
  bind_rows((high_potential_cities_summary %>% 
               mutate(Scenario = "Low albedo buildings"))) %>% 
  mutate(Scenario = fct_relevel(Scenario, "All buildings", "Large buildings", "Low albedo buildings")) %>% 
  left_join(city_albedo) 

random_colors <- kelly.colors(14)[3:14]
names(random_colors) <- unique(eff_area_long$City)

labels <- paste0(scenarios$city, " (", round(scenarios$build_pct, 2), "%)") %>% 
  unique()
labels <- labels[1:11]
names(labels) <- unique(scenarios$city)

scenarios_plot <- scenarios %>% 
  filter(Sensor == "PNeo") %>% 
  ggplot() +
  geom_col(aes(x = fct_reorder(city, build_pct), y = Alb_change, fill = city, alpha = Scenario), 
           position = position_dodge(),
           color = "black") +
  scale_x_discrete(labels = labels) +
  scale_fill_manual(values = random_colors,
                    guide = "none") +
  scale_alpha_manual(name = "Cool roof scenario", values = c(0.3, 0.6, 1)) +
  geom_label(aes(x = fct_reorder(city, Albedo), y = Alb_change, label = label, group = Scenario),
             position = position_dodge(width = 0.9), vjust = -0.2, size = 3) +
  geom_point(data = scenarios %>% filter(Sensor == "S2"), 
             aes(x = fct_reorder(city, Albedo), y = Alb_change, group = Scenario),
             position = position_dodge(width = 0.9), 
             color = "black", shape = 21, fill = "white", size = 4, stroke = 1) +
  xlab("") +
  ylab("City-wide albedo change") +
  theme_bw(base_size = 18) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = c(0.7, 0.8),  # Move legend to a specific location in the plot
    legend.background = element_rect(fill = "white", color = "black"),  # Add border for clarity
    legend.key = element_rect(fill = "white")  # Set key background color to white
  )

# Figure 6 ----------------------------------------------------------------

## temperature change global cities ####

scenarios_plot + temp_change_plot + plot_annotation(tag_levels = 'a') +  plot_layout(widths = c(2, 1))


# SI table 1 --------------------------------------------------------------

S2 <- metadata %>% 
  filter(imager == "S2") %>% 
  rename(Sensor = imager,
         Date = albedoDate) %>% 
  select(! PNeoDate) %>% 
  distinct()

pneo <- metadata %>% 
  filter(imager == "PNeo") %>% 
  rename(Sensor = imager,
         Date = PNeoDate) %>% 
  select(! albedoDate) %>% 
  distinct() %>% 
  mutate(Date = str_remove(Date, "_\\d{2}-\\d{2}-\\d{2}$"))

S2 %>% 
  bind_rows(pneo) %>% 
  mutate(Date = case_when(Date == "2023-05-23" ~ "May 5, 2023", 
                          Date == "2024-05-08" ~ "May 8, 2024",
                          Date == "2024-05-16" ~ "May 16, 2024",
                          Date == "2024-04-30" ~ "April 30, 2024",
                          .default = Date),
         solarElevation = round(solarElevation, 2),
         solarAzimuth = round(solarAzimuth, 2)) %>% 
  gt() %>% 
  cols_align(
    align = "right",
    columns = c(Date)  # unquoted name or tidyselect
  ) %>% 
  cols_align(
    align = "center",
    columns = c(Sensor)  # unquoted name or tidyselect
  ) %>% 
  cols_label(
    solarElevation = "Solar Elevation",
    solarAzimuth = "Solar Azimuth")

neon_zenith %>% 
  mutate(solarElevation = round(90 - solarZenith, 2),
         solarAzimuth = round(solarAzimuth, 2)) %>% 
  select(! solarZenith) %>% 
  gt() %>% 
  cols_label(
    solarElevation = "Solar Elevation",
    solarAzimuth = "Solar Azimuth")

